library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.3 ✓ purrr 0.3.4
## ✓ tibble 3.1.2 ✓ dplyr 1.0.6
## ✓ tidyr 1.1.3 ✓ stringr 1.4.0
## ✓ readr 1.4.0 ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(arules)
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
##
## Attaching package: 'arules'
## The following object is masked from 'package:dplyr':
##
## recode
## The following objects are masked from 'package:base':
##
## abbreviate, write
library(arulesViz)
library(RColorBrewer)
# read transactions dataset and convert to arules sparse matrix format
fcsv <- "https://raw.githubusercontent.com/multidis/hult-retail-analytics/main/shopping_cart/transactions_binary.csv"
df_trans <- read_csv(fcsv)
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## .default = col_double()
## )
## ℹ Use `spec()` for the full column specifications.
trans <- transactions(as.matrix(df_trans))
summary(trans)
## transactions as itemMatrix in sparse format with
## 9835 rows (elements/itemsets/transactions) and
## 169 columns (items) and a density of 0.02609146
##
## most frequent items:
## whole milk other vegetables rolls/buns soda
## 2513 1903 1809 1715
## yogurt (Other)
## 1372 34055
##
## element (itemset/transaction) length distribution:
## sizes
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
## 2159 1643 1299 1005 855 645 545 438 350 246 182 117 78 77 55 46
## 17 18 19 20 21 22 23 24 26 27 28 29 32
## 29 14 14 9 11 4 6 1 1 1 1 3 1
##
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.000 2.000 3.000 4.409 6.000 32.000
##
## includes extended item information - examples:
## labels
## 1 frankfurter
## 2 sausage
## 3 liver loaf
# rules exceeding support and confidence thresholds
rules <- apriori(trans, parameter = list(support=0.001, confidence=0.5))
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## 0.5 0.1 1 none FALSE TRUE 5 0.001 1
## maxlen target ext
## 10 rules TRUE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 9
##
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[169 item(s), 9835 transaction(s)] done [0.00s].
## sorting and recoding items ... [157 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 3 4 5 6 done [0.02s].
## writing ... [5668 rule(s)] done [0.00s].
## creating S4 object ... done [0.00s].
# top 5 rules in terms of lift
inspect(head(sort(rules, by ="lift"), 5))
## lhs rhs support
## [1] {Instant food products,soda} => {hamburger meat} 0.001220132
## [2] {soda,popcorn} => {salty snack} 0.001220132
## [3] {flour,baking powder} => {sugar} 0.001016777
## [4] {ham,processed cheese} => {white bread} 0.001931876
## [5] {whole milk,Instant food products} => {hamburger meat} 0.001525165
## confidence coverage lift count
## [1] 0.6315789 0.001931876 18.99565 12
## [2] 0.6315789 0.001931876 16.69779 12
## [3] 0.5555556 0.001830198 16.40807 10
## [4] 0.6333333 0.003050330 15.04549 19
## [5] 0.5000000 0.003050330 15.03823 15
# frequency plot
itemFrequencyPlot(trans, topN = 20)

# most promising rules by lift (confidence-support scatterplot)
rules_sel <- head(sort(rules, by="lift"), 50)
plot(rules_sel, engine="plotly", control=list(col=brewer.pal(11,"Spectral")), main="")
## To reduce overplotting, jitter is added! Use jitter = 0 to prevent jitter.
# network representation
plot(rules_sel, method = "graph", engine = "htmlwidget")
# rules for a selected product
rules_selprod <- subset(rules, subset = rhs %pin% "pastry")
inspect(head(sort(rules_selprod, by="lift"), 10))
## lhs rhs support confidence coverage lift count
## [1] {citrus fruit,
## whole milk,
## whipped/sour cream,
## rolls/buns} => {pastry} 0.001016777 0.5 0.002033554 5.62 10